1 Overview

This script:

Part 1: visualizes and analyzes the within vs between multivariate patterns in 8 focal regions (left and right SMG, left and right STS, left and right V1, bilateral APC and RFC), along the following boundaries:

  • event within domain
  • event across domain
  • domain within event
  • domain across event

Part 2: Performs a region by region MVPA effect size analysis over domain-specific and domain-general regions.

Part 3: Same as part 1, but for other, non-focal ROIs

All instances of write_rds and similar have been commented out to avoid writing over existing files. Uncomment those statements in order to reproduce these steps and save (new) outputs.

knitr::opts_chunk$set(echo = TRUE)

library(pacman)
pacman::p_load(tidyverse,
               conflicted,
               cowplot,
               here,
               effsize,
               ggrepel,
               patchwork,
               ggpubr,
               boot,
               nptest,
               boot.pval,
               doMC
)

conflict_prefer("select", "dplyr")
## [conflicted] Will prefer dplyr::select over any other package
conflict_prefer("filter", "dplyr")
## [conflicted] Will prefer dplyr::filter over any other package
conflict_prefer("rename", "dplyr")
## [conflicted] Will prefer dplyr::rename over any other package
conflict_prefer("arrange", "dplyr")
## [conflicted] Will prefer dplyr::arrange over any other package
conflict_prefer("summarise", "dplyr")
## [conflicted] Will prefer dplyr::summarise over any other package
conflict_prefer("lmer", "lmerTest")
## [conflicted] Will prefer lmerTest::lmer over any other package
conflict_prefer("here", "here")
## [conflicted] Will prefer here::here over any other package
source(here("helper_funs.R"))

sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS  10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] doMC_1.3.8       iterators_1.0.12 foreach_1.5.0    boot.pval_0.4   
##  [5] nptest_1.0-3     boot_1.3-25      ggpubr_0.4.0     patchwork_1.1.2 
##  [9] ggrepel_0.8.2    effsize_0.8.0    here_1.0.1       cowplot_1.0.0   
## [13] conflicted_1.1.0 forcats_0.5.0    stringr_1.5.0    dplyr_1.0.9     
## [17] purrr_0.3.4      readr_1.3.1      tidyr_1.2.0      tibble_3.1.7    
## [21] ggplot2_3.4.1    tidyverse_1.3.0  pacman_0.5.1    
## 
## loaded via a namespace (and not attached):
##  [1] fs_1.5.2          lubridate_1.7.9   httr_1.4.2        rprojroot_2.0.3  
##  [5] tools_4.0.2       backports_1.4.1   bslib_0.4.2       utf8_1.2.2       
##  [9] R6_2.5.1          DBI_1.1.2         colorspace_2.0-3  withr_2.5.0      
## [13] tidyselect_1.1.2  curl_4.3          compiler_4.0.2    cli_3.5.0        
## [17] rvest_0.3.6       xml2_1.3.2        sass_0.4.4        scales_1.2.0     
## [21] digest_0.6.31     foreign_0.8-80    rmarkdown_2.19.2  rio_0.5.16       
## [25] pkgconfig_2.0.3   htmltools_0.5.4   dbplyr_1.4.4      fastmap_1.1.0    
## [29] rlang_1.0.6       readxl_1.3.1      rstudioapi_0.13   jquerylib_0.1.4  
## [33] generics_0.1.2    jsonlite_1.8.4    zip_2.2.0         car_3.0-8        
## [37] magrittr_2.0.3    Rcpp_1.0.8        munsell_0.5.0     fansi_1.0.3      
## [41] abind_1.4-5       lifecycle_1.0.3   stringi_1.7.8     yaml_2.3.6       
## [45] carData_3.0-4     grid_4.0.2        blob_1.2.1        crayon_1.5.1     
## [49] haven_2.4.3       hms_0.5.3         knitr_1.41        pillar_1.7.0     
## [53] ggsignif_0.6.3    codetools_0.2-16  reprex_0.3.0      glue_1.6.2       
## [57] evaluate_0.19     data.table_1.13.0 modelr_0.1.8      Rdpack_2.3.1     
## [61] vctrs_0.5.1       cellranger_1.1.0  gtable_0.3.0      assertthat_0.2.1 
## [65] cachem_1.0.6      xfun_0.36         openxlsx_4.1.5    rbibutils_2.2.8  
## [69] broom_0.8.0       rstatix_0.6.0     memoise_2.0.1     ellipsis_0.3.2

First we set up the data.

MVPA_processed_distances <- readRDS(here("outputs/multivariate_data/study2_MVPA_processed_distances_top100_runs12.Rds")) %>%
  filter(
         # row_domain != "both",
         # column_domain != "both",
         row_event != "fam",
         column_event != "fam",
         same_run == 0)

# repeat the following processing steps w this df to get results including distances within runs (exploratory)
MVPA_processed_distances_within_runs_only <- readRDS(here("outputs/multivariate_data/study2_MVPA_processed_distances_top100_runs12.Rds")) %>%
  filter(
         # row_domain != "both",
         # column_domain != "both",
         row_event != "fam",
         same_run == 1,
         column_event != "fam")

2 SETUP: Summarize between vs within distances

For all the steps below, enter either dataset from above to compute the distances, either only between runs (pre-registered) or only within runs (exploratory).

2.1 (1) Events Across Domains

event.across.domain.summarylong <- MVPA_processed_distances %>%
  filter(pair_label_event_across_domains != "drop_domain") %>%
  group_by(distance_metric,
           subj,
           ROI_name_final,
           pair_label_event_across_domains) %>%
  summarise(mean_dist = mean(dist))

event.across.domain.summarywide <-
  event.across.domain.summarylong %>%
  pivot_wider(
    id_cols = c(subj, ROI_name_final, distance_metric),
    names_from = pair_label_event_across_domains,
    values_from = mean_dist
  ) %>%
  mutate(diff = between - within,
         category = "event_across_domain")

results1_event.across.domain <- event.across.domain.summarywide %>% 
  group_by(distance_metric, ROI_name_final, category) %>%
  summarise(V = wilcox.test(diff, alternative = "greater", mu = 0)$statistic,
            p = wilcox.test(diff, alternative = "greater", mu = 0)$p.value,
            z = qnorm(p/2),
            r = abs(z/sqrt(32))) 

2.2 (2) Domains Across Events

domain.across.event.summarylong <- MVPA_processed_distances %>%
  filter(pair_label_domains_across_events != "drop_event") %>%
  group_by(distance_metric,
           subj,
           ROI_name_final,
           pair_label_domains_across_events) %>%
  summarise(mean_dist = mean(dist))

domain.across.event.summarywide <-
  domain.across.event.summarylong %>%
  pivot_wider(
    id_cols = c(subj, ROI_name_final, distance_metric),
    names_from = pair_label_domains_across_events,
    values_from = mean_dist
  ) %>%
  mutate(diff = between - within,
         category = "domain_across_event")


results2_domain.across.event <- domain.across.event.summarywide %>% 
  group_by(distance_metric, ROI_name_final, category) %>%
  summarise(V = wilcox.test(diff, alternative = "greater", mu = 0)$statistic,
            p = wilcox.test(diff, alternative = "greater", mu = 0)$p.value,
            z = qnorm(p/2),
            r = abs(z/sqrt(32))) 

2.3 (3) Events within domains

event.within.domain.summarylong <- MVPA_processed_distances %>%
  filter(pair_label_event_within_domain != "drop_domain") %>%
  group_by(distance_metric,
           subj,
           ROI_name_final,
           pair_label_event_within_domain) %>%
  summarise(mean_dist = mean(dist))

event.within.domain.summarywide <-
  event.within.domain.summarylong %>%
  pivot_wider(
    id_cols = c(subj, ROI_name_final, distance_metric),
    names_from = pair_label_event_within_domain,
    values_from = mean_dist
  ) %>%
  mutate(diff = between - within)

event.within.domain.perdomain.summarylong <-
  MVPA_processed_distances %>%
  filter(pair_label_event_within_domain != "drop_domain") %>%
  group_by(distance_metric,
           subj,
           ROI_name_final,
           same_domain,
           pair_label_event_within_domain) %>%
  summarise(mean_dist = mean(dist))


event.within.domain.perdomain.summarywide <-
  event.within.domain.perdomain.summarylong %>%
  pivot_wider(
    id_cols = c(subj, ROI_name_final, distance_metric, same_domain),
    names_from = pair_label_event_within_domain,
    values_from = mean_dist
  ) %>%
  mutate(diff = between - within,
         category = "event_within_domain")

results3_event.within.domain <- event.within.domain.perdomain.summarywide %>% 
  group_by(distance_metric, same_domain, ROI_name_final, category) %>%
  summarise(V = wilcox.test(diff, alternative = "greater", mu = 0)$statistic,
            p = wilcox.test(diff, alternative = "greater", mu = 0)$p.value,
            z = qnorm(p/2),
            r = abs(z/sqrt(32))) 

2.4 (4) Domains within events

domain.within.event.summarylong <- MVPA_processed_distances %>%
  filter(
      pair_label_domain_within_event != "drop_event"
  ) %>%
  group_by(distance_metric, subj, ROI_name_final, pair_label_domain_within_event) %>%
  summarise(mean_dist = mean(dist))

domain.within.event.summarywide <-
  domain.within.event.summarylong %>%
  pivot_wider(
    id_cols = c(subj, ROI_name_final, distance_metric),
    names_from = pair_label_domain_within_event,
    values_from = mean_dist
  ) %>%
  mutate(diff = between - within)

domain.within.event.perevent.summarylong <-
  MVPA_processed_distances %>%
  filter(
      pair_label_domain_within_event != "drop_event"
  ) %>%
  group_by(distance_metric,
           subj,
           ROI_name_final,
           same_event,
           pair_label_domain_within_event) %>%
  summarise(mean_dist = mean(dist))

domain.within.event.perevent.summarywide <-
  domain.within.event.perevent.summarylong %>%
  pivot_wider(
    id_cols = c(subj, ROI_name_final, distance_metric, same_event),
    names_from = pair_label_domain_within_event,
    values_from = mean_dist
  ) %>%
  mutate(diff = between - within,
         category = "domain_within_event")


results4_domain.within.event <- domain.within.event.perevent.summarywide %>% 
  group_by(distance_metric, same_event, ROI_name_final, category) %>%
  summarise(V = wilcox.test(diff, alternative = "greater", mu = 0)$statistic,
            p = wilcox.test(diff, alternative = "greater", mu = 0)$p.value,
            z = qnorm(p/2),
            r = abs(z/sqrt(32))) 

2.5 Combining MVPA results

MVPA_results0 <-
  rbind(
    results1_event.across.domain,
    results2_domain.across.event,
    results3_event.within.domain,  ## only relevant for between runs
    results4_domain.within.event
  ) %>%
  mutate(p_tails = "one",
         H0_mu = 0,
         H1 = "greater") %>%
  as.data.frame()

region_info <- read.csv(here("input_data/manyregions_info.csv"))

MVPA_results <- full_join(MVPA_results0, region_info) 

MVPA_results_out <- MVPA_results %>%
  mutate(p = round(p, digits = 3)) %>%
  mutate(star = as.factor(case_when(p< .001 ~ "***",
                        p < .01 ~ "**",
                        p < .05 ~ "*",
                        p < .1 ~ "~",
                        TRUE ~ " "))) %>%
  mutate(test = "one-tailed Wilcoxon signed rank test against mu = 0")

MVPA_results_out$ROI_category <- factor(MVPA_results_out$ROI_category, levels = c("physics", "psychology", "MD", "early_visual"))
# saveRDS(MVPA_results_out, here("outputs/multivariate_results/study2_MVPA_results_exploratory_across_runs12_including_psych_env.Rds"))
MVPA_results_persubject0 <-
  rbind(
    event.across.domain.summarywide,
    domain.across.event.summarywide,
    event.within.domain.perdomain.summarywide,  ## only relevant for between runs
    domain.within.event.perevent.summarywide
  ) %>%
  as.data.frame()

MVPA_results_persubject <- full_join(MVPA_results_persubject0, region_info)
MVPA_results_persubject$ROI_category <- factor(MVPA_results_persubject$ROI_category, levels = c("physics", "psychology", "MD", "early_visual"))

# saveRDS(MVPA_results_persubject, here("outputs/multivariate_results/study2_MVPA_results_exploratory_across_runs12_including_psych_env_persubject.Rds"))

2.6 Retain + export within and between distances for visualization, and join with noise ceiling

euclidean_noise_ceiling <- readRDS(here("outputs/multivariate_results/study2_MVPA_noiseceiling_withinrun_perROI.Rds")) %>%
  filter(cor_metric == "euclidean") %>%
  select(-cor_metric) 
MVPA_results_within_between_separate_prelim <-
  rbind(
    domain.within.event.summarylong,
    event.within.domain.summarylong,
    domain.within.event.perevent.summarylong,
    event.within.domain.perdomain.summarylong,
    domain.across.event.summarylong,
    event.across.domain.summarylong
  )

MVPA_results_withinrun_prelim2 <- left_join(MVPA_results_within_between_separate_prelim, euclidean_noise_ceiling)

MVPA_results_withinrun_final <- left_join(MVPA_results_withinrun_prelim2, region_info)

MVPA_results_withinrun_final$ROI_category <-
  factor(
    MVPA_results_withinrun_final$ROI_category,
    levels = c("physics", "psychology", "MD", "early_visual")
  )
# saveRDS(MVPA_results_withinrun_final, here("outputs/multivariate_results/study2_MVPA_results_withinruns12_with_noise_ceiling.Rds"))

3 PART 1 (Confirmatory): Focal regions

MVPA_results_focal <-
  readRDS(here("outputs/multivariate_results/study2_MVPA_results_betweenruns12.Rds")) %>%
  filter(focal_region == 1,
         distance_metric == "euclidean")

MVPA_results_focal_persubject <- 
  readRDS(here("outputs/multivariate_results/study2_MVPA_results_betweenruns12_persubject.Rds")) %>%
  filter(focal_region == 1,
         distance_metric == "euclidean")

MVPA_results_focal %>%
  group_by(ROI_name_final, category) %>%
  summarise(n = n())
## `summarise()` has grouped output by 'ROI_name_final'. You can override using
## the `.groups` argument.
MVPA_results_focal_persubject %>%
  group_by(ROI_name_final, category) %>%
  summarise(n = n())
## `summarise()` has grouped output by 'ROI_name_final'. You can override using
## the `.groups` argument.
DT::datatable(dplyr::arrange(MVPA_results_focal, category),  options = list(scrollX = TRUE))
plot_multivariate <- function(dataframe, dataframe_persubject, category_label, facet_label) {
  plot <-
    ggplot(
      dataframe_persubject %>% filter(
        category == {{category_label}},
        distance_metric == "euclidean"
      ) %>%
        arrange(ROI_category),
      aes(x = ROI_name_final, y = diff, fill = ROI_category)
    ) +
    stat_summary(stat = "mean_se()", geom = "bar") +
    geom_hline(yintercept = 0, linetype = "dotted") +
    stat_summary(
      geom = "errorbar",
      width = .1,
      fun.data = "mean_se",
      colour = "black"
    ) +
    ylab("Between - Within Euclidean Distance") +
    geom_text(
      data = dataframe %>% filter(
        category == {{category_label}},
        distance_metric == "euclidean"
      ),
      mapping = aes(label = star, x = ROI_name_final, y = -10),
      size = 7,
      colour = "red",
      family = "mono",
      # inherit.aes = FALSE
    ) +
    geom_point(alpha = .2) +
    theme_cowplot(10) +
    scale_fill_manual(values = c("#4894ff", "#f71d00", "#f8bf00", "#fb00d3")) +
    coord_flip() +
    ggtitle(paste0("Category boundary: ", category_label))
  
  if (!is.na(facet_label)) {
    return(plot + facet_wrap({{facet_label}}))
  }
  else {
    return(plot)
  }
}
plot_multivariate(MVPA_results_focal %>% filter(focal_region == 1, distance_metric == "euclidean"), MVPA_results_focal_persubject  %>% filter(focal_region == 1, distance_metric == "euclidean"), "domain_across_event", NA)
## Warning in stat_summary(stat = "mean_se()", geom = "bar"): Ignoring unknown
## parameters: `stat`
## No summary function supplied, defaulting to `mean_se()`

plot_multivariate(MVPA_results_focal %>% filter(focal_region == 1, distance_metric == "euclidean"), MVPA_results_focal_persubject  %>% filter(focal_region == 1, distance_metric == "euclidean"), "event_across_domain", NA)
## Warning in stat_summary(stat = "mean_se()", geom = "bar"): Ignoring unknown
## parameters: `stat`
## No summary function supplied, defaulting to `mean_se()`

plot_multivariate(MVPA_results_focal %>% filter(focal_region == 1), MVPA_results_focal_persubject  %>% filter(focal_region == 1), "domain_within_event", "same_event")
## Warning in stat_summary(stat = "mean_se()", geom = "bar"): Ignoring unknown
## parameters: `stat`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

plot_multivariate(MVPA_results_focal %>% filter(focal_region == 1), MVPA_results_focal_persubject  %>% filter(focal_region == 1), "event_within_domain", "same_domain")
## Warning in stat_summary(stat = "mean_se()", geom = "bar"): Ignoring unknown
## parameters: `stat`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

MVPA_results_within_between_separate_final <- read_rds( here("outputs/multivariate_results/study2_MVPA_results_betweenruns12_with_noise_ceiling.Rds"))


MVPA_focal_domainplot <-
  ggplot(
    MVPA_results_within_between_separate_final %>%
      filter(
        !is.na(pair_label_domains_across_events),
        distance_metric == "euclidean",
        focal_region == 1
      ),
    aes(pair_label_domains_across_events, mean_dist, fill = ROI_category)
  ) +
  # geom_boxplot() +
  stat_summary(stat = "mean_se()",
               geom = "bar",
               colour = "black") +
  stat_summary(stat = "mean_se()", geom = "errorbar", width = .2) +
  geom_point(alpha = .2) +
  scale_fill_manual(values = c("#4894ff", "#f71d00", "#f8bf00", "#fb00d3")) +
  facet_wrap(
    ~ factor(
      ROI_name_final,
      levels = c(
        "superiortemporal_L",
        "superiortemporal_R",
        "supramarginal_L",
        "supramarginal_R",
        "antParietal_bilateral",
        "precentral_inferiorfrontal_R",
        "V1_bilateral",
        "MT_bilateral"
      )
    ),
    labeller = labeller(ROI_name_final = label_wrap_gen(width = 10, multi_line = TRUE)),
    nrow = 2
  ) +
  xlab("Domains Across Events Category Boundary") +
  ylab("Euclidean Distance") +
  geom_hline(
    data = euclidean_noise_ceiling %>% filter(focal_region == 1),
    aes(
      yintercept = noiseceiling_upper,
      size = 1,
      alpha = 0.5
    )
  )
## Warning in stat_summary(stat = "mean_se()", geom = "bar", colour = "black"):
## Ignoring unknown parameters: `stat`
## Warning in stat_summary(stat = "mean_se()", geom = "errorbar", width = 0.2):
## Ignoring unknown parameters: `stat`
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
MVPA_focal_eventplot <-
  ggplot(
    MVPA_results_within_between_separate_final %>%
      filter(
        !is.na(pair_label_event_across_domains),
        distance_metric == "euclidean",
        focal_region == 1
      ),
    aes(pair_label_event_across_domains, mean_dist, fill = ROI_category)
  ) +
  # geom_boxplot() +
  stat_summary(stat = "mean_se()",
               geom = "bar",
               colour = "black") +
  stat_summary(stat = "mean_se()", geom = "errorbar", width = .2) +
  geom_point(alpha = .2) +
  xlab("Events Across Domains Category Boundary") +
  ylab("Euclidean Distance") +
  scale_fill_manual(values = c("#4894ff", "#f71d00", "#f8bf00", "#fb00d3")) +
  facet_wrap( ~ factor(
    ROI_name_final,
    levels = c(
      "superiortemporal_L",
      "superiortemporal_R",
      "supramarginal_L",
      "supramarginal_R",
      "antParietal_bilateral",
      "precentral_inferiorfrontal_R",
      "V1_bilateral",
      "MT_bilateral"
    )
  ), nrow = 2) +
  geom_hline(
    aes(
      yintercept = noiseceiling_upper,
      size = 1,
      alpha = 0.5
    ))
## Warning in stat_summary(stat = "mean_se()", geom = "bar", colour = "black"):
## Ignoring unknown parameters: `stat`
## Warning in stat_summary(stat = "mean_se()", geom = "errorbar", width = 0.2):
## Ignoring unknown parameters: `stat`
MVPA_results_withinrun_final <- readRDS(here("outputs/multivariate_results/study2_MVPA_results_withinruns12_with_noise_ceiling.Rds"))

MVPA_focal_eventplot <-
  ggplot(
    MVPA_results_withinrun_final %>%
      filter(
        !is.na(pair_label_event_across_domains),
        distance_metric == "euclidean",
        focal_region == 1
      ),
    aes(pair_label_event_across_domains, mean_dist, fill = ROI_category)
  ) +
  # geom_boxplot() +
  stat_summary(stat = "mean_se()",
               geom = "bar",
               colour = "black") +
  stat_summary(stat = "mean_se()", geom = "errorbar", width = .2) +
  geom_point(alpha = .2) +
  xlab("Events Across Domains Category Boundary") +
  ylab("Euclidean Distance") +
  scale_fill_manual(values = c("#4894ff", "#f71d00", "#f8bf00", "#fb00d3")) +
  facet_wrap( ~ factor(
    ROI_name_final,
    levels = c(
      "superiortemporal_L",
      "superiortemporal_R",
      "supramarginal_L",
      "supramarginal_R",
      "antParietal_bilateral",
      "precentral_inferiorfrontal_R",
      "V1_bilateral",
      "MT_bilateral"
    )
  ), nrow = 2) +
  coord_cartesian(ylim = c(0, 75)) +
  geom_hline(
    aes(
      yintercept = noiseceiling_upper,
      size = 1,
      alpha = 0.5
    ))
## Warning in stat_summary(stat = "mean_se()", geom = "bar", colour = "black"):
## Ignoring unknown parameters: `stat`
## Warning in stat_summary(stat = "mean_se()", geom = "errorbar", width = 0.2):
## Ignoring unknown parameters: `stat`
MVPA_focal_domainplot + MVPA_focal_eventplot
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

4 PART 2 (Confirmatory): Region by Region Effect Size

4.1 Domain specific regions

DS_results <- readRDS(here("outputs/multivariate_results/study2_MVPA_results_betweenruns12.Rds")) %>%
  filter(distance_metric == "euclidean_zscored") %>%
  filter(manyregions_region == 1) %>%
  filter(ROI_category == "psychology" | ROI_category == "physics") %>%
  pivot_wider(id_cols = c(ROI_name_final, ROI_category), names_from = c(category, same_domain, same_event), values_from = r) %>%
  mutate(domain = "specific")

colnames(DS_results) <- gsub("_NA","",colnames(DS_results))

DS_results_z <- DS_results %>%
  mutate_at(c("event_within_domain_physics", 
              "event_within_domain_psychology",
              "domain_within_event_exp",
              "domain_within_event_unexp"), scale)


DS_results_sqrt <- DS_results %>%
  mutate_at(c("event_within_domain_physics", 
              "event_within_domain_psychology",
              "domain_within_event_exp",
              "domain_within_event_unexp"), sqrt)


DS_events_within_domains <-
  ggplot(data = DS_results,
         aes(event_within_domain_psychology, event_within_domain_physics)) +
  geom_smooth(method = "glm", colour = "black") +
  geom_point(aes(colour = ROI_category), size = 3) +
  ylab("Effect size (r), Events within Domains, Physics") +
  xlab("Effect size (r), Events within Domains, Psychology") +
  # coord_cartesian(ylim = c(-1, 1),
  #                 xlim = c(-1, 1)) +
  geom_text_repel(aes(label = ROI_name_final), size = 3) +
  theme_cowplot(10) +
  scale_colour_manual(values = c("#4894ff", "#f71d00")) +
  ggtitle(label = "MVPA effect sizes per ROI, \nevents within domains, z-scored")

# f71d00 red
# 4894ff blue
# f8bf00 yellow
# fb00d3 pink

DS_domains_within_events <-
  ggplot(data = DS_results,
         aes(
           domain_within_event_exp,
           domain_within_event_unexp
         )) +
  geom_smooth(method = "glm", colour = "black") +
  geom_point(aes(colour = ROI_category), size = 3) +
  xlab("Effect size (r), Domains within Events, Expected") +
  ylab("Effect size (r), Domains within Events, Unexpected") +
  # coord_cartesian(ylim = c(0, 1),
  #                 xlim = c(0, 1)) +
  geom_text_repel(aes(label = ROI_name_final), size = 3) +
  theme_cowplot(10) +
  scale_colour_manual(values = c("#4894ff", "#f71d00")) +
  ggtitle(label = "MVPA effect sizes per ROI, \ndomains within events, z-scored")
DS_events_within_domains + DS_domains_within_events
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

4.2 Domain general regions

DG_results <- readRDS(here("outputs/multivariate_results/study2_MVPA_results_betweenruns12.Rds")) %>%
  filter(distance_metric == "euclidean_zscored") %>%
  filter(manyregions_region == 1) %>%
  filter(ROI_category == "MD" | ROI_category == "early_visual") %>%
  filter(!str_detect(ROI_name_final, "_bilateral")) %>% # use L/R regions, bilateral was for focal region analysis
  pivot_wider(id_cols = c(ROI_name_final, ROI_category), names_from = c(category, same_domain, same_event), values_from = r) %>%
  mutate(domain = "general")

colnames(DG_results) <- gsub("_NA","",colnames(DG_results))

DG_results_z <- DG_results %>%
  mutate_at(c("event_within_domain_physics", 
              "event_within_domain_psychology",
              "domain_within_event_exp",
              "domain_within_event_unexp"), scale)

DG_results_sqrt <- DG_results %>%
  mutate_at(c("event_within_domain_physics", 
              "event_within_domain_psychology",
              "domain_within_event_exp",
              "domain_within_event_unexp"), sqrt)

DG_events_within_domains <-
  ggplot(data = DG_results,
         aes(event_within_domain_psychology, event_within_domain_physics)) +
  geom_smooth(method = "glm", colour = "black") +
  geom_point(aes(colour = ROI_category), size = 3) +
  ylab("Effect size (r), Events within Domains, Physics") +
  xlab("Effect size (r), Events within Domains, Psychology") +
  # coord_cartesian(ylim = c(0, 1),
                  # xlim = c(0, 1)) +
  geom_text_repel(aes(label = ROI_name_final), size = 3) +
  theme_cowplot(10) +
  scale_colour_manual(values = c("#f8bf00", "#fb00d3")) +
  ggtitle(label = "MVPA effect sizes per ROI, \nevents within domains, z-scored")

DG_domains_within_events <-
  ggplot(data = DG_results,
         aes(
           domain_within_event_exp,
           domain_within_event_unexp
         )) +
  geom_smooth(method = "glm", colour = "black") +
  geom_point(aes(colour = ROI_category), size = 3) +
  xlab("Effect size (r), Domains within Events, Expected") +
  ylab("Effect size (r), Domains within Events, Unexpected") +
  # coord_cartesian(ylim = c(0, 1),
  #                 xlim = c(0, 1)) +
  geom_text_repel(aes(label = ROI_name_final), size = 3) +
  theme_cowplot(10) +
  scale_colour_manual(values = c("#f8bf00", "#fb00d3")) +
  ggtitle(label = "MVPA effect sizes per ROI, \ndomains within events, z-scored")
DG_events_within_domains + DG_domains_within_events
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

4.3 Statistics

# instead of testing whether the linear relationship between x and y is 0, 
# we test for independence instead. H0 is that x and y are independent; F_{XY}(x,y) = F_X(x) F_Y(y).
observed_cors_ind <- rbind(DS_results, DG_results) %>%
  group_by(domain) %>%
  summarise(
    cor_domain_within_event =
      np.cor.test(
        domain_within_event_exp,
        domain_within_event_unexp,
        alternative = "greater",
        independent = TRUE
      )$estimate,
    
    p_domain_within_event =
      np.cor.test(
        domain_within_event_exp,
        domain_within_event_unexp,
        alternative = "greater",
        independent = TRUE
      )$p.value,
    
    cor_event_within_domain =
      np.cor.test(
        DG_results$event_within_domain_psychology,
        DG_results$event_within_domain_physics,
        alternative = "greater",
        independent = TRUE
      )$estimate,
    
    p_event_within_domain =
      np.cor.test(
        DG_results$event_within_domain_psychology,
        DG_results$event_within_domain_physics,
        alternative = "greater",
        independent = TRUE
      )$p.value
  )

observed_cors_ind
# write_rds(observed_cors_ind, path = here("outputs/multivariate_results/study2_MVPA_manyregions_observed_cor.Rds"))
set.seed(2020)
## First define a function to work out the difference of corrs:
diff_corr <- function(data, indices) {
  data <- data[indices,]
  cor1 <-
    np.cor.test(data$domain_within_event_exp,
                data$domain_within_event_unexp,
                alternative = "greater",
                independent = TRUE,
                parallel = TRUE)$estimate
  cor2 <-
    np.cor.test(
      data$event_within_domain_psychology,
      data$event_within_domain_physics,
      alternative = "greater",
      independent = TRUE,
      parallel = TRUE)$estimate

    return(cor1 - cor2)
}

4.3.1 Domain specific

doMC::registerDoMC(cores = 2)  # for running in parallel

# Then apply a bootstrap procedure with 4000 draws (uncomment to reproduce):
# res_boot_DS <- boot(
#   data = DS_results,
#   R = 4000,
#   statistic = diff_corr,
#   stype = "i"
# )

# saveRDS(res_boot_DS, here("outputs/multivariate_results/study2_MVPA_dsregions_4000perms_confirmatory.Rds"))

res_boot_DS <- read_rds(here("outputs/multivariate_results/study2_MVPA_dsregions_4000perms_confirmatory.Rds"))

## Retrieve the empirical 95% confidence interval:
ds_results <- append(boot.ci(res_boot_DS, type = "perc", conf = 0.95),boot.pval(res_boot_DS))

# saveRDS(ds_results, here("outputs/multivariate_results/study2_MVPA_dsregions_summary.Rds"))

4.3.2 Domain general

## Then apply a bootstrap procedure with 4000 draws (uncomment to reproduce):
# res_boot_DG <- boot(data = DG_results,
#                  R = 4000,
#                  statistic = diff_corr,
#                  stype = "i")

# saveRDS(res_boot_DG, here("outputs/multivariate_results/study2_MVPA_dgregions_4000perms_confirmatory.Rds"))
res_boot_DG <- readRDS(here("outputs/multivariate_results/study2_MVPA_dgregions_4000perms_confirmatory.Rds"))

plot(res_boot_DG)

## Retrieve the empirical 95% confidence interval (adjusted to 90% because of one-tailed prediction):

dg_results <- append(boot.ci(res_boot_DG, type = "perc", conf = 0.95),boot.pval(res_boot_DG))
saveRDS(dg_results, here("outputs/multivariate_results/study2_MVPA_dgregions_summary.Rds"))

5 PART 3 (Exploratory): All regions

MVPA_results_all <- readRDS(here("outputs/multivariate_results/study2_MVPA_results_betweenruns12.Rds")) %>%
  filter(distance_metric == "euclidean")

MVPA_results_all_persubject <- readRDS(here("outputs/multivariate_results/study2_MVPA_results_betweenruns12_persubject.Rds")) %>%
  filter(distance_metric == "euclidean")

MVPA_results_all %>%
  group_by(ROI_name_final, category) %>%
  summarise(n = n())
## `summarise()` has grouped output by 'ROI_name_final'. You can override using
## the `.groups` argument.
MVPA_results_all_persubject %>%
  group_by(ROI_name_final, category) %>%
  summarise(n = n())
## `summarise()` has grouped output by 'ROI_name_final'. You can override using
## the `.groups` argument.
plot_multivariate_all <- function(category_label, facet_label) {
  plot <-
    ggplot(
      MVPA_results_all_persubject %>% filter(
        category == {{category_label}},
        distance_metric == "euclidean"
      ),
      aes(x = ROI_name_final, y = diff, fill = ROI_category)
    ) +
    stat_summary(stat = "mean_se()", geom = "bar") +
    geom_hline(yintercept = 0, linetype = "dotted") +
    stat_summary(
      geom = "errorbar",
      width = .1,
      fun.data = "mean_se",
      colour = "black"
    ) +
    ylab("Between - Within Euclidean Distance") +
    geom_text(
      data = MVPA_results_all %>% filter(
        category == {{category_label}},
        distance_metric == "euclidean"
      ),
      mapping = aes(label = star, x = ROI_name_final, y = -10),
      size = 7,
      colour = "red",
      family = "mono",
      # inherit.aes = FALSE
    ) +
    geom_point(alpha = .2) +
    theme_cowplot(10) +
    scale_fill_manual(values = c("#2eafb9", "#f74d09", "#f2de1e", "#f269f5")) +
    coord_flip() +
    ggtitle(paste0("Category boundary: ", category_label))
  
  if (!is.na(facet_label)) {
    return(plot + facet_wrap(vars(.data[[facet_label]])))
  }
  else {
    return(plot)
  }
}

plot_multivariate_all("domain_across_event", NA)
## Warning in stat_summary(stat = "mean_se()", geom = "bar"): Ignoring unknown
## parameters: `stat`
## No summary function supplied, defaulting to `mean_se()`

plot_multivariate_all("event_across_domain", NA)
## Warning in stat_summary(stat = "mean_se()", geom = "bar"): Ignoring unknown
## parameters: `stat`
## No summary function supplied, defaulting to `mean_se()`

plot_multivariate_all("domain_within_event", "same_event")
## Warning in stat_summary(stat = "mean_se()", geom = "bar"): Ignoring unknown
## parameters: `stat`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

plot_multivariate_all("event_within_domain", "same_domain")
## Warning in stat_summary(stat = "mean_se()", geom = "bar"): Ignoring unknown
## parameters: `stat`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`